Initial check-in to support PV balloon driver within HVM guests.
authorSteven Hand <steven@xensource.com>
Wed, 4 Apr 2007 18:59:10 +0000 (19:59 +0100)
committerSteven Hand <steven@xensource.com>
Wed, 4 Apr 2007 18:59:10 +0000 (19:59 +0100)
Still todo:

 - fix mapcache invalidation (should happen in Xen)
 - support 32-on-64 mode correctly

Signed-off-by: Steven Hand <steven@xensource.com>
16 files changed:
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c
unmodified_drivers/linux-2.6/Makefile
unmodified_drivers/linux-2.6/balloon/Kbuild [new file with mode: 0644]
unmodified_drivers/linux-2.6/balloon/Makefile [new file with mode: 0644]
unmodified_drivers/linux-2.6/mkbuildtree
unmodified_drivers/linux-2.6/platform-pci/xen_support.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/irq.c
xen/arch/x86/hvm/svm/svm.c
xen/arch/x86/hvm/vmx/vmx.c
xen/common/memory.c
xen/include/asm-x86/hvm/hvm.h
xen/include/asm-x86/hvm/support.h

index 39f6e24524940a9a4de7084c36a3b8c4f2447a22..c052097889e392be3e1f53f14273d3f2fe11a6a0 100644 (file)
 #include <asm/hypervisor.h>
 #include <xen/balloon.h>
 #include <xen/interface/memory.h>
+#include <asm/maddr.h>
+#include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
+#include <linux/highmem.h>
 #include <linux/list.h>
 #include <xen/xenbus.h>
 #include "common.h"
 
+#ifndef CONFIG_XEN 
+#define scrub_pages(_p,_n)
+#endif
+
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *balloon_pde;
 #endif
@@ -217,6 +224,7 @@ static int increase_reservation(unsigned long nr_pages)
 
                set_phys_to_machine(pfn, frame_list[i]);
 
+#ifdef CONFIG_XEN
                /* Link back into the page tables if not highmem. */
                if (pfn < max_low_pfn) {
                        int ret;
@@ -226,6 +234,7 @@ static int increase_reservation(unsigned long nr_pages)
                                0);
                        BUG_ON(ret);
                }
+#endif
 
                /* Relinquish the page back to the allocator. */
                ClearPageReserved(page);
@@ -242,6 +251,8 @@ static int increase_reservation(unsigned long nr_pages)
        return 0;
 }
 
+extern void xen_invalidate_foreign_mappings(void);
+
 static int decrease_reservation(unsigned long nr_pages)
 {
        unsigned long  pfn, i, flags;
@@ -275,7 +286,7 @@ static int decrease_reservation(unsigned long nr_pages)
                                (unsigned long)v, __pte_ma(0), 0);
                        BUG_ON(ret);
                }
-#ifdef CONFIG_XEN_SCRUB_PAGES
+#ifdef CONFIG_XEN
                else {
                        v = kmap(page);
                        scrub_pages(v, 1);
@@ -284,19 +295,24 @@ static int decrease_reservation(unsigned long nr_pages)
 #endif
        }
 
+#ifdef CONFIG_XEN
        /* Ensure that ballooned highmem pages don't have kmaps. */
        kmap_flush_unused();
        flush_tlb_all();
+#endif
 
        balloon_lock(flags);
 
        /* No more mappings: invalidate P2M and add to balloon. */
        for (i = 0; i < nr_pages; i++) {
                pfn = mfn_to_pfn(frame_list[i]);
+#ifdef CONFIG_XEN
                set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+#endif
                balloon_append(pfn_to_page(pfn));
        }
 
+        xen_invalidate_foreign_mappings(); 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
@@ -446,7 +462,7 @@ static struct notifier_block xenstore_notifier;
 
 static int __init balloon_init(void)
 {
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86) && defined(CONFIG_XEN) 
        unsigned long pfn;
        struct page *page;
 #endif
@@ -456,8 +472,12 @@ static int __init balloon_init(void)
 
        IPRINTK("Initialising balloon driver.\n");
 
+#ifdef CONFIG_XEN
        bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
        totalram_pages   = bs.current_pages;
+#else 
+        bs.current_pages = totalram_pages; 
+#endif
        bs.target_pages  = bs.current_pages;
        bs.balloon_low   = 0;
        bs.balloon_high  = 0;
@@ -479,7 +499,7 @@ static int __init balloon_init(void)
 #endif
        balloon_sysfs_init();
 
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86) && defined(CONFIG_XEN) 
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
                page = pfn_to_page(pfn);
@@ -498,6 +518,14 @@ static int __init balloon_init(void)
 
 subsys_initcall(balloon_init);
 
+static void balloon_exit(void) 
+{
+    /* XXX - release balloon here */
+    return; 
+}
+
+module_exit(balloon_exit); 
+
 void balloon_update_driver_allowance(long delta)
 {
        unsigned long flags;
@@ -507,6 +535,7 @@ void balloon_update_driver_allowance(long delta)
        balloon_unlock(flags);
 }
 
+#ifdef CONFIG_XEN
 static int dealloc_pte_fn(
        pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
@@ -524,6 +553,7 @@ static int dealloc_pte_fn(
        BUG_ON(ret != 1);
        return 0;
 }
+#endif
 
 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
@@ -559,8 +589,13 @@ struct page **alloc_empty_pages_and_pagevec(int nr_pages)
                        if (ret == 1)
                                ret = 0; /* success */
                } else {
+#ifdef CONFIG_XEN
                        ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
                                                  dealloc_pte_fn, NULL);
+#else 
+                        /* cannot handle non-auto translate mode */
+                        ret = 1; 
+#endif
                }
 
                if (ret != 0) {
@@ -576,7 +611,9 @@ struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 
  out:
        schedule_work(&balloon_worker);
+#ifdef CONFIG_XEN
        flush_tlb_all();
+#endif
        return pagevec;
 
  err:
index 68bf60ddd8c65e6c6cc567126278b65dabf8ccf4..463a9f7e3b78dfc8bcd1138dd2bd9a4af9e5b505 100644 (file)
@@ -29,6 +29,7 @@
  */
 
 #include <linux/capability.h>
+#include <linux/errno.h>
 #include <linux/stat.h>
 #include <linux/sysdev.h>
 #include "common.h"
index 07588d36a3b399eeff64fc5aaa46630b9ad01594..2b2d11f77b057b445f8152a16a375a64f5640922 100644 (file)
@@ -407,11 +407,13 @@ grow_nomem:
 static unsigned int __max_nr_grant_frames(void)
 {
        struct gnttab_query_size query;
-       int rc;
+       int rc = -1;
 
        query.dom = DOMID_SELF;
 
+#ifdef CONFIG_XEN
        rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+#endif
        if ((rc < 0) || (query.status != GNTST_okay))
                return 4; /* Legacy max supported number of frames */
 
index fe9311be66e8387b91b295530cdfd6d386b04c64..74242732eaf0642fde0b9cba421f83281380a3ca 100644 (file)
@@ -1,4 +1,5 @@
 
+#include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <xen/xen_proc.h>
 
@@ -12,7 +13,11 @@ struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
        return create_proc_entry(name, mode, xen_base);
 }
 
+EXPORT_SYMBOL_GPL(create_xen_proc_entry); 
+
 void remove_xen_proc_entry(const char *name)
 {
        remove_proc_entry(name, xen_base);
 }
+
+EXPORT_SYMBOL_GPL(remove_xen_proc_entry); 
index 119016f53162bcfd89629a3745d7127b4e2ecbe7..6722df5ac6e617d028c421a10c4ceca2a512dfb2 100644 (file)
@@ -2,6 +2,7 @@ include $(M)/overrides.mk
 
 obj-m += platform-pci/
 obj-m += xenbus/
+obj-m += balloon/
 obj-m += blkfront/
 obj-m += netfront/
 obj-m += util/
diff --git a/unmodified_drivers/linux-2.6/balloon/Kbuild b/unmodified_drivers/linux-2.6/balloon/Kbuild
new file mode 100644 (file)
index 0000000..bcc8b05
--- /dev/null
@@ -0,0 +1,9 @@
+include $(M)/overrides.mk
+
+obj-m  = xen-balloon.o
+
+EXTRA_CFLAGS += -I$(M)/platform-pci
+
+xen-balloon-objs =
+xen-balloon-objs += balloon.o
+xen-balloon-objs += sysfs.o
diff --git a/unmodified_drivers/linux-2.6/balloon/Makefile b/unmodified_drivers/linux-2.6/balloon/Makefile
new file mode 100644 (file)
index 0000000..64e7acd
--- /dev/null
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
index 289e63b03a1ba72cf7069c7f5b588f7cafd46d07..31415928cdeb6d78c7dd442e00d3fb51fc05a4ce 100755 (executable)
@@ -17,6 +17,9 @@ for d in $(find ${XL}/drivers/xen/ -maxdepth 1 -type d | sed -e 1d); do
     if ! echo $d | egrep -q back; then
         lndir $d $(basename $d) > /dev/null 2>&1
     fi
+    if ! echo $d | egrep -q ball; then
+        lndir $d $(basename $d) > /dev/null 2>&1
+    fi
 done
 
 ln -sf ${XL}/drivers/xen/core/gnttab.c platform-pci
@@ -47,6 +50,7 @@ i[34567]86)
     ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypercall.h include/asm
     ln -sf ${XL}/include/asm-i386/mach-xen/asm/synch_bitops.h include/asm
     ln -sf ${XL}/include/asm-i386/mach-xen/asm/maddr.h include/asm
+    ln -sf ${XL}/include/asm-i386/mach-xen/asm/page.h include/asm
   ;;
 "ia64")
     ln -sf ${XL}/include/asm-ia64/hypervisor.h include/asm
index 431115a8cc924c386b7e10848a4178c57007a4f1..bb0bdc07220c009c6cb6a1ff4b5b72d1bffdb99d 100644 (file)
@@ -59,12 +59,3 @@ void xen_machphys_update(unsigned long mfn, unsigned long pfn)
 }
 EXPORT_SYMBOL(xen_machphys_update);
 
-void balloon_update_driver_allowance(long delta)
-{
-}
-EXPORT_SYMBOL(balloon_update_driver_allowance);
-
-void balloon_release_driver_page(struct page *page)
-{
-}
-EXPORT_SYMBOL(balloon_release_driver_page);
index 735f32fc3f3e49b0d7e8005f9bc80d2824a7147c..66478c5800cbb83428b6ee644864edb77fb45975 100644 (file)
@@ -521,12 +521,12 @@ static hvm_hypercall_t *hvm_hypercall_table[NR_hypercalls] = {
     HYPERCALL(hvm_op)
 };
 
-void hvm_do_hypercall(struct cpu_user_regs *pregs)
+int hvm_do_hypercall(struct cpu_user_regs *pregs)
 {
     if ( unlikely(ring_3(pregs)) )
     {
         pregs->eax = -EPERM;
-        return;
+        return 0;
     }
 
     if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
@@ -535,11 +535,21 @@ void hvm_do_hypercall(struct cpu_user_regs *pregs)
                 current->domain->domain_id, current->vcpu_id,
                 pregs->eax);
         pregs->eax = -ENOSYS;
-        return;
+        return 0;
     }
 
+    /* Install a canary value in regs->eip so can check for continuation */
+    pregs->eip |= 0xF; 
+
     pregs->eax = hvm_hypercall_table[pregs->eax](
         pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
+
+    /* XXX: pot fake IO instr here to inform the emulator to flush mapcache */
+
+    if( (pregs->eip & 0xF) == 0 ) /* preempted */
+        return 1; 
+
+    return 0; 
 }
 
 #else /* defined(__x86_64__) */
@@ -599,12 +609,12 @@ static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
     HYPERCALL(event_channel_op)
 };
 
-void hvm_do_hypercall(struct cpu_user_regs *pregs)
+int hvm_do_hypercall(struct cpu_user_regs *pregs)
 {
     if ( unlikely(ring_3(pregs)) )
     {
         pregs->rax = -EPERM;
-        return;
+        return 0;
     }
 
     pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
@@ -614,7 +624,7 @@ void hvm_do_hypercall(struct cpu_user_regs *pregs)
                 current->domain->domain_id, current->vcpu_id,
                 pregs->rax);
         pregs->rax = -ENOSYS;
-        return;
+        return 0;
     }
 
     if ( current->arch.paging.mode->guest_levels == 4 )
@@ -633,6 +643,7 @@ void hvm_do_hypercall(struct cpu_user_regs *pregs)
                                                        (uint32_t)pregs->esi,
                                                        (uint32_t)pregs->edi);
     }
+    return 0; /* XXX SMH: fix for preempt here */
 }
 
 #endif /* defined(__x86_64__) */
index a85151eb267c81c87b68ae540b05b3eea4f1afa7..c6c3e7c90635947b64ab767be918efc2a6e7a326 100644 (file)
@@ -347,11 +347,7 @@ int hvm_local_events_need_delivery(struct vcpu *v)
 
     pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
     if ( unlikely(pending) )
-    {
-        struct cpu_user_regs regs;
-        hvm_store_cpu_guest_regs(v, &regs, NULL);
-        pending = !irq_masked(regs.eflags);
-    }
+        pending = hvm_interrupts_enabled(v); 
 
     return pending;
 }
index 5f03dba8843fd43ae33326ed37923369118dee75..8b25f5019bba64e62050935dff18356f1eca809a 100644 (file)
@@ -498,6 +498,12 @@ static int svm_realmode(struct vcpu *v)
     return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
 }
 
+static int svm_interrupts_enabled(struct vcpu *v)
+{
+    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
+    return !irq_masked(eflags); 
+}
+
 static int svm_guest_x86_mode(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -800,6 +806,7 @@ static struct hvm_function_table svm_function_table = {
     .paging_enabled       = svm_paging_enabled,
     .long_mode_enabled    = svm_long_mode_enabled,
     .pae_enabled          = svm_pae_enabled,
+    .interrupts_enabled   = svm_interrupts_enabled,
     .guest_x86_mode       = svm_guest_x86_mode,
     .get_guest_ctrl_reg   = svm_get_ctrl_reg,
     .get_segment_base     = svm_get_segment_base,
@@ -2268,8 +2275,8 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
         inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
         ASSERT(inst_len > 0);
         HVMTRACE_1D(VMMCALL, v, regs->eax);
-        __update_guest_eip(vmcb, inst_len);
-        hvm_do_hypercall(regs);
+        if(hvm_do_hypercall(regs) == 0) /* not preempted */
+            __update_guest_eip(vmcb, inst_len);
         break;
 
     case VMEXIT_CR0_READ:
index e1fe8290e5ea630228e11a1c0c38dce9577fafa7..d7c700ca24336ccc884341ff3fa3789c6c2d1ac4 100644 (file)
@@ -957,6 +957,13 @@ static int vmx_pae_enabled(struct vcpu *v)
     return (vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE));
 }
 
+static int vmx_interrupts_enabled(struct vcpu *v) 
+{
+    unsigned long eflags = __vmread(GUEST_RFLAGS); 
+    return !irq_masked(eflags); 
+}
+
+
 static void vmx_update_host_cr3(struct vcpu *v)
 {
     ASSERT( (v == current) || !vcpu_runnable(v) );
@@ -1030,6 +1037,7 @@ static struct hvm_function_table vmx_function_table = {
     .paging_enabled       = vmx_paging_enabled,
     .long_mode_enabled    = vmx_long_mode_enabled,
     .pae_enabled          = vmx_pae_enabled,
+    .interrupts_enabled   = vmx_interrupts_enabled,
     .guest_x86_mode       = vmx_guest_x86_mode,
     .get_guest_ctrl_reg   = vmx_get_ctrl_reg,
     .get_segment_base     = vmx_get_segment_base,
@@ -2620,8 +2628,8 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
     {
         HVMTRACE_1D(VMMCALL, v, regs->eax);
         inst_len = __get_instruction_length(); /* Safe: VMCALL */
-        __update_guest_eip(inst_len);
-        hvm_do_hypercall(regs);
+        if(hvm_do_hypercall(regs) == 0)        /* not preempted */
+            __update_guest_eip(inst_len);
         break;
     }
     case EXIT_REASON_CR_ACCESS:
index 5adef9b07c3eb97ee5ae584ec2dff8fe0279b8ac..139a0e89b3d503a24b17b47fbf0b53825c65c86d 100644 (file)
@@ -176,12 +176,7 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
     if ( unlikely((page->count_info & PGC_count_mask) != 1) )
     {
         shadow_drop_references(d, page);
-        /* We'll make this a guest-visible error in future, so take heed! */
-        if ( (page->count_info & PGC_count_mask) != 1 )
-            gdprintk(XENLOG_INFO, "Dom%d freeing in-use page %lx "
-                     "(pseudophys %lx): count=%lx type=%lx\n",
-                     d->domain_id, mfn, get_gpfn_from_mfn(mfn),
-                     (unsigned long)page->count_info, page->u.inuse.type_info);
+        /* NB: still may have foreign references to the page at this stage */
     }
 
     guest_physmap_remove_page(d, gmfn, mfn);
index 4872505ef3af6ece75401f815a109b2a0c1f0f2c..60f44194c8dc3ab30c85bb5906d6c28c7e1fa7f2 100644 (file)
@@ -93,13 +93,15 @@ struct hvm_function_table {
      * 1) determine whether paging is enabled,
      * 2) determine whether long mode is enabled,
      * 3) determine whether PAE paging is enabled,
-     * 4) determine the mode the guest is running in,
-     * 5) return the current guest control-register value
-     * 6) return the current guest segment descriptor base
+     * 4) determine whether interrupts are enabled or not,
+     * 5) determine the mode the guest is running in,
+     * 6) return the current guest control-register value
+     * 7) return the current guest segment descriptor base
      */
     int (*paging_enabled)(struct vcpu *v);
     int (*long_mode_enabled)(struct vcpu *v);
     int (*pae_enabled)(struct vcpu *v);
+    int (*interrupts_enabled)(struct vcpu *v);
     int (*guest_x86_mode)(struct vcpu *v);
     unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
     unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
@@ -189,6 +191,12 @@ hvm_pae_enabled(struct vcpu *v)
     return hvm_funcs.pae_enabled(v);
 }
 
+static inline int
+hvm_interrupts_enabled(struct vcpu *v)
+{
+    return hvm_funcs.interrupts_enabled(v);
+}
+
 static inline int
 hvm_guest_x86_mode(struct vcpu *v)
 {
index 3d7b3a43bbd26b1fcd6d0fb86d53aaf7691215fb..9880f920a09559baeb6a11d33bac642771d7cdd8 100644 (file)
@@ -228,7 +228,7 @@ int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
 void hvm_print_line(struct vcpu *v, const char c);
 void hlt_timer_fn(void *data);
 
-void hvm_do_hypercall(struct cpu_user_regs *pregs);
+int hvm_do_hypercall(struct cpu_user_regs *pregs);
 
 void hvm_hlt(unsigned long rflags);
 void hvm_triple_fault(void);